import pandas as pd
import re

df = pd.read_csv('../static/data/house_info.csv')
df['title'] = df['title'].apply(lambda x: x.replace('\n', '').replace('\r', '').replace(' ', ''))
df['house_info'] = df['house_info'].apply(lambda x: x.replace('\n', '').replace('\r', '').replace(' ', ''))
df['address'] = df['house_info'].apply(lambda x: '|'.join(re.findall('([\u4e00-\u9fa5]{2}区)-', x)).split('|')[0])
df['address'] = df['address'].apply(lambda x: '无' if x == '' else x)
df['address'] = df['address'].apply(lambda x: '无' if re.findall('仅剩\d间', x) else x)
df['area'] = df['house_info'].apply(lambda x: ''.join(re.findall('/([\-\d\.].*?)㎡/', x)))
df['area'] = df['area'].apply(lambda x: '0' if x == '' else x)
df['area'] = df['area'].apply(lambda x: round(sum([float(i) for i in x.split('-')]) / len(x.split('-'))))
df['house_type'] = df['house_info'].apply(lambda x: ''.join(re.findall('/(\d室\d厅\d卫)/', x)))
df['house_type'] = df['house_type'].apply(lambda x: '无' if x == '' else x)
df['face'] = df['house_info'].apply(lambda x: ''.join(re.findall('/([东南西北]{1,2})/', x)))
df['face'] = df['face'].apply(lambda x: '无' if x == '' else x)
df['face'] = df['face'].apply(lambda x: x[0])
df['floor'] = df['house_info'].apply(lambda x: ''.join(re.findall('[高中低]楼层', x)))
df['floor'] = df['floor'].apply(lambda x: '无' if x == '' else x)
df['price'] = df['price'].apply(lambda x: x.split(' ')[0])
df['price'] = df['price'].apply(lambda x: round(sum([float(i) for i in x.split('-')]) / len(x.split('-'))))
df['tags'] = df['tags'].apply(lambda x: '无' if pd.isnull(x) else x)
df['tags'] = df['tags'].apply(lambda x: '无' if pd.isna(x) else x)
df_new = df[['title', 'address', 'area', 'house_type', 'face', 'floor', 'price', 'tags']]
df_new.to_csv('../static/data/house_info_pre.csv', index=False)
